knitr::opts_chunk$set(
warning = TRUE, # show warnings during codebook generation
message = TRUE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
library(rio)
library(labelled)
library(codebook)
##
## Attaching package: 'codebook'
## The following object is masked from 'package:labelled':
##
## to_factor
codebook_data <- import("../data_processing/output_data/priming_data/sr_prime_trials.csv")
# cat(paste(names(codebook_data), collapse = " = '', \n"))
var_label(codebook_data) <- list(
word_combo = 'The combination of the target word with the cue word - used to help combine information across trials.',
unique_trial = 'Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.',
observation = "Unique participant ID number.",
cue_word = 'The cue word shown in the priming trial. ',
cue_type = 'The type of trial shown for the cue - should always be word for priming trials.',
cue_correct = 'If the cue was answered correctly.',
trial_order = 'What order the trials were shown in - used with the unique_trial to line up trials.',
target_duration = "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
target_word = 'The target word shown to the participant.',
target_type = 'The type of trial shown for the target - should always be word for priming trials.',
target_correct = 'If the target was answered correctly.',
target_Z_RT = 'The z-scored RT for the ',
keep_target = 'If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).',
keep_participant = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.',
keep_participant_answered = 'If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.',
cue_end_of_trial = 'How the trial ended as response or timeout.',
target_end_of_trial = 'How the trial ended as response or timeout.',
target_original_duration = "The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).",
type = 'If the trial type was related or unrelated. '
)
metadata(codebook_data)$name <- "Semantic Priming Across Many Languages Priming Level Trials"
metadata(codebook_data)$description <- "This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues.
Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages."
metadata(codebook_data)$identifier <- "https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$creator <- "Erin M. Buchanan"
metadata(codebook_data)$citation <- "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833"
metadata(codebook_data)$url <- "https://github.com/SemanticPriming/SPAML/releases/"
metadata(codebook_data)$datePublished <- "2024-05-01"
metadata(codebook_data)$temporalCoverage <- "2022-2024"
metadata(codebook_data)$spatialCoverage <- "Online"
codebook(codebook_data)
Dataset name: Semantic Priming Across Many Languages Priming Level Trials
This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues.
Semantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.
Temporal Coverage: 2022-2024
Spatial Coverage: Online
Citation: Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van’t Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833
Identifier: https://doi.org/10.5281/zenodo.10888833
Date published: 2024-05-01
Creator:
| name | value |
|---|---|
| 1 | Erin M. Buchanan |
|
#Variables
The combination of the target word with the cue word - used to help combine information across trials.
Distribution of values for word_combo
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| word_combo | The combination of the target word with the cue word - used to help combine information across trials. | character | 0 | 1 | 2000 | 0 | 4 | 29 | 0 |
Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.
Distribution of values for unique_trial
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| unique_trial | Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially. | character | 0 | 1 | 95598 | 0 | 16 | 18 | 0 |
Unique participant ID number.
Distribution of values for observation
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| observation | Unique participant ID number. | character | 0 | 1 | 681 | 0 | 14 | 14 | 0 |
The cue word shown in the priming trial.
Distribution of values for cue_word
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| cue_word | The cue word shown in the priming trial. | character | 0 | 1 | 989 | 0 | 2 | 17 | 0 |
The type of trial shown for the cue - should always be word for priming trials.
Distribution of values for cue_type
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| cue_type | The type of trial shown for the cue - should always be word for priming trials. | character | 0 | 1 | 1 | 0 | 4 | 4 | 0 |
If the cue was answered correctly.
Distribution of values for cue_correct
241 missing values.
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| cue_correct | If the cue was answered correctly. | logical | 241 | 0.997479 | TRU: 91375, FAL: 3982 | 0.9582411 |
What order the trials were shown in - used with the unique_trial to line up trials.
Distribution of values for trial_order
0 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| trial_order | What order the trials were shown in - used with the unique_trial to line up trials. | numeric | 0 | 1 | 1 | 194 | 400 | 196.3604 | 115.2478 | ▇▇▇▇▇ |
The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.
Distribution of values for target_duration
0 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| target_duration | The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved. | numeric | 0 | 1 | 164 | 641 | 2974 | 725.0498 | 300.8918 | ▇▃▁▁▁ |
The target word shown to the participant.
Distribution of values for target_word
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| target_word | The target word shown to the participant. | character | 0 | 1 | 987 | 0 | 2 | 19 | 0 |
The type of trial shown for the target - should always be word for priming trials.
Distribution of values for target_type
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| target_type | The type of trial shown for the target - should always be word for priming trials. | character | 0 | 1 | 1 | 0 | 4 | 4 | 0 |
If the target was answered correctly.
Distribution of values for target_correct
0 missing values.
| name | label | data_type | n_missing | complete_rate | count | mean |
|---|---|---|---|---|---|---|
| target_correct | If the target was answered correctly. | logical | 0 | 1 | TRU: 95598 | 1 |
The z-scored RT for the
Distribution of values for target_Z_RT
0 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| target_Z_RT | The z-scored RT for the | numeric | 0 | 1 | -2.8 | -0.64 | 11 | -0.4523956 | 0.7619565 | ▇▂▁▁▁ |
If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).
Distribution of values for keep_target
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep_target | If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered). | character | 0 | 1 | 1 | 0 | 4 | 4 | 0 |
If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.
Distribution of values for keep_participant
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep_participant | If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN. | character | 0 | 1 | 1 | 0 | 4 | 4 | 0 |
If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.
Distribution of values for keep_participant_answered
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| keep_participant_answered | If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED. | character | 0 | 1 | 1 | 0 | 4 | 4 | 0 |
How the trial ended as response or timeout.
Distribution of values for cue_end_of_trial
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| cue_end_of_trial | How the trial ended as response or timeout. | character | 0 | 1 | 2 | 0 | 7 | 8 | 0 |
How the trial ended as response or timeout.
Distribution of values for target_end_of_trial
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| target_end_of_trial | How the trial ended as response or timeout. | character | 0 | 1 | 1 | 0 | 8 | 8 | 0 |
The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).
Distribution of values for target_original_duration
0 missing values.
| name | label | data_type | n_missing | complete_rate | min | median | max | mean | sd | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| target_original_duration | The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data). | numeric | 0 | 1 | 164 | 641 | 2974 | 725.0498 | 300.8918 | ▇▃▁▁▁ |
If the trial type was related or unrelated.
Distribution of values for type
0 missing values.
| name | label | data_type | n_missing | complete_rate | n_unique | empty | min | max | whitespace |
|---|---|---|---|---|---|---|---|---|---|
| type | If the trial type was related or unrelated. | character | 0 | 1 | 2 | 0 | 7 | 9 | 0 |
The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.
{
"name": "Semantic Priming Across Many Languages Priming Level Trials",
"description": "This dataset includes the paired trial level data for the priming trials in the SPAML study. The data has been taken from long format where each trial is one row of data, to semi-long format, wherein each paired trial is one line of data. For example if a cue was CAT and the target was DOG, the full trial data includes each as a separate row. This dataset pairs them together to denote which combinations were related and unrelated, as well as which words were targets and cues. \n\nSemantic priming has been studied for nearly 50 years across various experimental manipulations and theoretical frameworks. These studies provide insight into the cognitive underpinnings of semantic representations in both healthy and clinical populations; however, they have suffered from several issues including generally low sample sizes and a lack of diversity in linguistic implementations. Here, we will test the size and the variability of the semantic priming effect across ten languages by creating a large database of semantic priming values, based on an adaptive sampling procedure. Differences in response latencies between related word-pair conditions and unrelated word-pair conditions (i.e., difference score confidence interval is greater than zero) will allow quantifying evidence for semantic priming, whereas improvements in model fit with the addition of a random intercept for language will provide support for variability in semantic priming across languages.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n[truncated]\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.9.2).",
"identifier": "https://doi.org/10.5281/zenodo.10888833",
"creator": "Erin M. Buchanan",
"citation": "Buchanan, E., Cuccolo, K., Heyman, T., Iyer, A., Coles, N., Lewis Jr, N., Peters, K., van Berkel, N., Taylor, J., Van't Veer, A. E., Montefinese, M., Valentine, K. D., Maxwell, N., Türkan, B. N., Williams, G., Oliveros-Chacana, J. C., Röer, J., Fini, C., Acar, O., … Lewis, S. C. (2024). SemanticPriming/SPAML: SPAML v1 Data Release (v1.0.0) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.10888833",
"url": "https://github.com/SemanticPriming/SPAML/releases/",
"datePublished": "2024-05-01",
"temporalCoverage": "2022-2024",
"spatialCoverage": "Online",
"keywords": ["word_combo", "unique_trial", "observation", "cue_word", "cue_type", "cue_correct", "trial_order", "target_duration", "target_word", "target_type", "target_correct", "target_Z_RT", "keep_target", "keep_participant", "keep_participant_answered", "cue_end_of_trial", "target_end_of_trial", "target_original_duration", "type"],
"@context": "http://schema.org/",
"@type": "Dataset",
"variableMeasured": [
{
"name": "word_combo",
"description": "The combination of the target word with the cue word - used to help combine information across trials.",
"@type": "propertyValue"
},
{
"name": "unique_trial",
"description": "Unique trial number for each participant - was used to help make sure the cue-target lined up correctly since they were presented sequentially.",
"@type": "propertyValue"
},
{
"name": "observation",
"description": "Unique participant ID number.",
"@type": "propertyValue"
},
{
"name": "cue_word",
"description": "The cue word shown in the priming trial. ",
"@type": "propertyValue"
},
{
"name": "cue_type",
"description": "The type of trial shown for the cue - should always be word for priming trials.",
"@type": "propertyValue"
},
{
"name": "cue_correct",
"description": "If the cue was answered correctly.",
"@type": "propertyValue"
},
{
"name": "trial_order",
"description": "What order the trials were shown in - used with the unique_trial to line up trials.",
"@type": "propertyValue"
},
{
"name": "target_duration",
"description": "The duration in milliseconds of the entire trial from time shown to time end. This variable was set to NA if the trial was incorrectly answered, too long (3000+ms) or too short (<=160ms). The original duration is also preserved.",
"@type": "propertyValue"
},
{
"name": "target_word",
"description": "The target word shown to the participant.",
"@type": "propertyValue"
},
{
"name": "target_type",
"description": "The type of trial shown for the target - should always be word for priming trials.",
"@type": "propertyValue"
},
{
"name": "target_correct",
"description": "If the target was answered correctly.",
"@type": "propertyValue"
},
{
"name": "target_Z_RT",
"description": "The z-scored RT for the ",
"@type": "propertyValue"
},
{
"name": "keep_target",
"description": "If the trial level data should be kept based on our exclusion rules (not too long < 3000 ms, not too short > 160ms, correctly answered).",
"@type": "propertyValue"
},
{
"name": "keep_participant",
"description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials SEEN.",
"@type": "propertyValue"
},
{
"name": "keep_participant_answered",
"description": "If the participant should be kept based on our exclusion rules: must be 18 years old, saw at least 100 trials, correctly answered at least 80% of the trials ANSWERED.",
"@type": "propertyValue"
},
{
"name": "cue_end_of_trial",
"description": "How the trial ended as response or timeout.",
"@type": "propertyValue"
},
{
"name": "target_end_of_trial",
"description": "How the trial ended as response or timeout.",
"@type": "propertyValue"
},
{
"name": "target_original_duration",
"description": "The duration in milliseconds of the entire trial from time shown to time end without our exclusions applied (i.e., all data).",
"@type": "propertyValue"
},
{
"name": "type",
"description": "If the trial type was related or unrelated. ",
"@type": "propertyValue"
}
]
}`